* dependencies: reghdfe, reghdfejl, estout, coefplot, blindschemes, grc1leg2

if c(os)=="Windows" {
  global home D:/OneDrive - Open Philanthropy Project/Connectivity/Hjort & Poulsen
  global HPdata D:/OneDrive/Documents/Work/Library/Connectivity/Hjort and Poulsen 2019  // H&P public archive
}
else {
  global home /Users/davidroodman/Library/CloudStorage/OneDrive-OpenPhilanthropyProject/Connectivity/Hjort & Poulsen
  global HPdata /Users/davidroodman/Library/CloudStorage/OneDrive-Personal/Documents/Work/Library/Connectivity/Hjort and Poulsen 2019  // H&P public archive
}

cd "$home"
set scheme plottig
graph set window fontface "LM Roman 9"  // https://www.1001fonts.com/latin-modern-roman-font.html
collect create tstats, replace  // use Stata collect facility for t stat table

* one-time country border shapefile conversion
// cd "$home/Africa countries shapefile"
// spshape2dta afr_g2014_2013_0, replace  // https://geoportal.icpac.net/layers/geonode%3Aafr_g2014_2013_0
// cd "$home"

* get HP19 fiber map in form for plotting. Assigns proper country codes to segments of multi-country geometries; combines each country's geometries into single entry
// cap odbc load, dsn("Hjort & Poulsen 2019") clear exec("SELECT upper(iso2) as country, ogr_geometry.STAsText() as wkt FROM HP.[terrestrial smerged]")
// if !_rc saveold "replication/terrestrial smerged", replace

* get country boundaries for plotting
cap program drop getborders
program define getborders
  preserve
  gen n = _n
  collapse (first) n, by(ISO2)
  merge 1:m ISO2 using "$home/Africa countries shapefile/afr_g2014_2013_0", nogen keepusing(_ID) keep(match)
  merge 1:m  _ID using "$home/Africa countries shapefile/afr_g2014_2013_0_shp", nogen keepusing (_?) keep(match)
  keep if _Y > -40  // exclude Marion Island
  mata mapcountry=st_sdata(.,"ISO2"); maplong=st_data(.,"_X"); maplat=st_data(.,"_Y")
  restore
  getmata mapcountry maplong maplat, force replace
end

* convert a WKT description of a polygon to a Mata matrix
cap mata mata drop wkt2coords()
mata
mata set matastrict off
function wkt2coords(country, wkt) {
  coords = J(0,2,0); netcountry = J(0,1,"")
  for (c=1; c<=rows(wkt); c++) {
    _coords = colshape(strtoreal(tokens(subinstr(subinstr(subinstr(subinstr(subinstr(wkt[c], ",", ""), "(", ""), ")", " . . "), "MULTILINESTRING",""), "LINESTRING",""))), 2)
    netcountry = netcountry \ J(rows(_coords), 1, country[c])
    coords = coords \ _coords
  }
  return (&netcountry \ &coords)
}
mata mlib create lHP19, replace
mata mlib add lHP19 *()
mata mlib index
end

cap program drop getnet
program define getnet
  preserve
  gen n = _n
  collapse (first) n, by(ISO2)
  merge 1:m ISO2 using "$home/replication/terrestrial smerged", nogen keep(match)
  mata t = wkt2coords(st_sdata(.,"ISO2"), st_sdata(.,"wkt")); netcountry = *t[1]; coords = *t[2]
  restore
  getmata netcountry (netlong netlat)=coords, force replace
end


* maps comparing original and new connectedness classifications
cap program drop MapConnectedness
program define MapConnectedness
  cap drop connectedNetHP
  gen byte connectedNetHP = distanceNetHP < .005 if distanceNetHP<.
  cap drop connectedNetHPNew
  gen byte connectedNetHPNew = distanceNetHPNew < .005 if distanceNetHPNew<.

  getnet
  getborders

  levelsof ISO2, clean
  foreach c in `r(levels)' {
    levelsof country_name if ISO2=="`c'", local(title)
    line maplat maplong if mapcountry=="`c'", cmissing(n) xscale(off) yscale(off) ylab(none) xlab(none) lcolor(gs12) || ///
      line netlat netlong if netcountry=="`c'", cmissing(n) xscale(off) yscale(off) ylab(none) xlab(none) lcolor(black) || ///
      scatter latitude longitude if connectedNetHPNew==0 & connectedNetHP==0 & ISO2=="`c'", msize(medsmall) msym(Oh) || ///
      scatter latitude longitude if connectedNetHPNew==1 & connectedNetHP==1 & ISO2=="`c'", msize(medsmall) msym(Oh) || ///
      scatter latitude longitude if connectedNetHPNew==0 & connectedNetHP==1 & ISO2=="`c'", msize(medsmall) || ///
      scatter latitude longitude if connectedNetHPNew==1 & connectedNetHP==0 & ISO2=="`c'", msize(medsmall) ///
      legend(order(4 3 5 6) lab(4 `"Agree: "connected""') lab(3 `"Agree: "not connected""') lab(5 `"Disagree: "connected" in HP19"') lab(6 `"Disagree: "not connected" in HP19"') keygap(1)) ///
      name(`c', replace) title(`title')
  }
  levelsof ISO2, clean
  grc1leg2 `r(levels)', lrows(1) graphregion(margin(zero)) legscale(*1) imargin(zero) iscale(*1.5) name(`1'ConnMap, replace)
  graph export Replication/`1'ConnMap.png, width(2000) replace
end

* map comparing geocodings
cap program drop MapCoordinates
program define MapCoordinates
  getborders
  levelsof ISO2, clean
  local levels `r(levels)'
  foreach c in `levels' {
    levelsof country_name if ISO2=="`c'", local(title)
    line maplat maplong if mapcountry=="`c'", cmissing(no) lcolor(gs12) || ///
         pcspike latitudeHP longitudeHP latitude longitude if ISO2=="`c'" || ///
         scatter latitude longitude if ISO2=="`c'", xscale(off) yscale(off) ylab(none) xlab(none) msize(.2) ///
         title(`title') graphregion(margin(zero)) legend(off) name(`c', replace)
  }
  graph combine `levels', graphregion(margin(zero)) imargin(zero) iscale(*1.5) name(AfroLocMap, replace)
  graph export Replication/`1'LocMap.png, replace width(2000)
end

// hack: run schooling-interactions regression, then rotate & save results 4 times so the estimates for each schooling group slot into "Not primary" in turn, for easy harvesting by esttab
cap program drop estrot
program define estrot, eclass
  gettoken estname cmdline: 0, parse(": ")
  gettoken colon cmdline: cmdline, parse(": ")
  `cmdline'  // run estimate
  tempname b V
  mata p = 2, 3, 4, 1, 5..`=colsof(e(b))'
  forvalues e=0/3 {
    cap    sum `e'.edcat if e(sample), meanonly
    if _rc sum `e'.edcatHP if e(sample), meanonly
    estadd scalar mean=r(mean), replace
    local edcat: label edcats `e'
    eststo `estname'`edcat'
    estadd local depvar `edcat', replace  // put in fake depvar to better label table cols
    mat `b' = e(b)
    mat `V' = e(V)
    mata st_matrix("`b'", st_matrix("`b'")[p])
    mata st_matrix("`V'", st_matrix("`V'")[p,p])
    ereturn repost b=`b' V=`V'
  }
end

* plots like HP19 Figure 4
cap program drop Fig4
program define Fig4
  syntax [if/], timevar(string) revisedtimevar(string) cluster(passthru) REVISEDCLUSTer(string) a(string) reviseda(string) dataset(string) depvar(string) reviseddepvar(string) [revisedif(string) *]
  if `"`if'"' != "" local if & `if'

  cap mat drop bHP
  cap mat drop seHP
  cap mat drop bNew
  cap mat drop seNew
  mata st_matrix("atHP" , range(400,5000,100)':-25)
  mata st_matrix("atNew", range(400,5000,100)':+25)
  cap drop connected
  gen byte connected = .

  qui forvalues Tlim=400(100)5000 {
    replace connected = distanceNetHP < `Tlim'/111320
    reghdfejl `depvar'        1.submarines#1.connected if distanceNetHP   <.1        `if',`cluster'                  a(`a'                         `timevar') nosamp keepsing
    mat bHP  = nullmat( bHP),  _b[1.submarines#1.connected]
    mat seHP = nullmat(seHP), _se[1.submarines#1.connected]

    replace connected = distanceNetHPNew < `Tlim'/111320
    reghdfejl `reviseddepvar' 1.submarines#1.connected if distanceNetHPNew<.1 `revisedif', cluster(`revisedcluster') a(`reviseda' countryid#`revisedtimevar') nosamp
    mat bNew  = nullmat( bNew),  _b[1.submarines#1.connected]
    mat seNew = nullmat(seNew), _se[1.submarines#1.connected]
  }
  coefplot (mat(bHP), se(seHP) at(atHP) pstyle(p2)) (mat(bNew), se(seNew) at(atNew) pstyle(p5)), msize(small) ciopt(lwidth(medium)) yline(0, lcolor(gs8) lpat(solid)) ///
    xlab(0(1000)5000) ylab(,format(%4.2f)) plotregion(margin(zero)) xscale(range(0 5000)) xtitle("Connection radius (meters)", margin(tiny)) `options' ///
    legend(order(2 4) lab(2 HP19) lab(4 Revised)) name(Fig4`dataset', replace) xline(`=.005*111320', lcolor(gs8) lpat(solid))
end


***
*** Akamai
***

{
cap noi odbc load, clear dsn("Hjort & Poulsen 2019") exec("select [country name], a.* from Akamai.[Analysis data] a join [country concordance] cc on a.iso2 = cc.iso2")
if !_rc saveold "replication/Akamai", replace

use "replication/Akamai" if inlist(ISO2,"KE","MZ","ZA","TZ","BJ","GH") | inlist(ISO2,"MG","NG","SN","TG","CD","NA"), clear
replace time = yq(floor(time/10), mod(time,10))
bysort ISO2: egen subdate = min(time / submarines)
format %tq time subdate

gen lnbwavg = ln(bwavg)
cap label drop countryid
egen countryid = group(country_name), label
gen loginternet = ln(bwavg)
gen asinhinternet = asinh(bwavg)

replace distanceNetHPNew = min(distanceNetHPNew, distanceNetHPNewNbr)
replace distanceNode = min(distanceNode, distanceNodeNbr)

// keep if inlist(ISO2,"BJ","CD","KE","MG","NG","ZA","TZ")  // only these contribute identification

gen byte bigHP = inlist(city, "ABIDJAN", "ABOMEY", "ABUJA", "ACCRA", "ADISABEBA", "ANTANANARIVO", "ARUSHA", "BATA") | ///
                 inlist(city, "BEIRA", "BLANTYRE", "BLOEMFONTEIN", "BO", "BOBODIOULASSO", "BOSASO", "BOUAKE", "BRAZZAVILLE") | ///
                 inlist(city, "BUJUMBURA", "BULAWAYO", "CAMAYENNE", "CAPETOWN", "CASABLANCA", "CLAREMONT", "CONAKRY", "COTONOU") | ///
                 inlist(city, "CUREPIPE", "DAKAR", "DARESSALAAM", "DJIBOUTI", "DJOUGOU", "DOUALA", "DURBAN", "FES") | ///
                 inlist(city, "FRANCEVILLE", "FRANCISTOWN", "FREETOWN", "GABORONE", "GREENPOINT", "HARARE", "HARGEISA", "HELDERBERG") | ///
                 inlist(city, "HUAMBO", "IBADAN", "JOHANNESBURG", "KAMPALA", "KANO", "KHARTOUM", "KIGALI", "KINSHASA") | ///
                 inlist(city, "KISUMU", "KITWE", "KUMASI", "KWAZULU", "LAGOS", "LIBREVILLE", "LILONGWE", "LOBITO") | ///
                 inlist(city, "LOME", "LONDON", "LUANDA", "LUBUMBASHI", "LUSAKA", "LYNNWOOD", "MALABO", "MAPUTO") | ///
                 inlist(city, "MARSHALLTOWN", "MBUJIMAYI", "MEDUNSA", "MIDRAND", "MODDERFONTEIN", "MOGADISHU", "MOMBASA", "MONROVIA") | ///
                 inlist(city, "MORONI", "MPUMALANGA", "MWANZA", "NAIROBI", "NDOLA", "NEVES", "NEWLANDS", "NOUADHIBOU") | ///
                 inlist(city, "NOUAKCHOTT", "NZEREKORE", "OUAGADOUGOU", "PARKVIEW", "POINTENOIRE", "PORTGENTIL", "PORTLOUIS", "PORTSUDAN") | ///
                 inlist(city, "PRAIA", "PRETORIA", "RABAT", "RIVONIA", "ROSSLYN", "SAOTOME", "SERREKUNDA", "SILVERTON") | ///
                 inlist(city, "TAMALE", "VACOAS", "WALVISBAY", "WAVERLEY", "WAVERLY", "WINDHOEK", "WYNBERG", "YAOUNDE")

// https://data.un.org/Handlers/DownloadHandler.ashx?DataFilter=tableCode:240;countryCode:178,204,288,404,450,508,516,566,686,710,768,834;sexCode:0&DataMartId=POP&Format=csv&c=1,2,3,4,5,6,7,8,9,10,11,12,13,14,16,17,18&s=_countryEnglishNameOrderBy:asc,refYear:desc,areaCode:asc
// Relative to HP19, add PORTONOVO, KISANGANI, TEMA, MAKURU, NAMPULA, ZANZIBAR, DODOMA; drop ARUSHA, BLOEMFONTEIN, (East?) LONDON
gen byte big = inlist(city, "COTONOU","ABOMEY","DJOUGOU","PORTONOVO"     ) | ///
               inlist(city, "KINSHASA","LUBUMBASHI","MBUJIMAYI","KANANGA") | /// // DRC not in UNdata; https://www.geonames.org/CD/largest-cities-in-.html
               inlist(city, "KUMASI","ACCRA","TAMALE","TEMA"             ) | ///
               inlist(city, "NAIROBI","KISUMU","MOMBASA","NAKURU"        ) | ///
               inlist(city, "ANTANANARIVO"                               ) | ///
               inlist(city, "MAPUTO","NAMPULA","BEIRA","NAMPULA"         ) | ///
               inlist(city, "WINDHOEK","WALVISBAY"                       ) | ///
               inlist(city, "LAGOS","KANO","IBADAN","ABUJA"              ) | ///
               inlist(city, "DAKAR"                                      ) | ///
               inlist(city, "DARESSALAAM","MWANZA","ZANZIBAR","DODOMA"   ) | ///
               inlist(city, "LOME"                                       ) | ///
               inlist(city, "CAPETOWN","CLAREMONT","DURBAN","GREENPOINT","HELDERBERG","JOHANNESBURG","KWAZULU","LYNNWOOD","MARSHALLTOWN") | ///  // includes suburbs...
               inlist(city, "MEDUNSA","MIDRAND","MODDERFONTEIN","MPUMALANGA","NEWLANDS","PARKVIEW","PRETORIA","RIVONIA","ROSSLYN") | ///
               inlist(city, "SILVERTON","WAVERLEY","WYNBERG")

gen byte agree = (distanceNetHP < .005) == (distanceNetHPNew < .005) & (distanceNetHP < .1) == (distanceNetHPNew < .1)
sum agree if ip_count>10 // fraction agreeing; cited in text

* of the cities eligible for original regression,how many covered in revised geocoding?
preserve
collapse distanceNetHP distanceNetHPNew if ip_count>10, by(ISO2 city)
count
count if distanceNetHPNew<.
list if distanceNetHPNew==., sep(0)
restore

* "city" values in the sample that are also state names
* of these Kogi is dropped, as a singleton; Enugu, Kaduna, and Lagos are major cities; and Kwara is not
tab city if distanceNetHP<.1 & ip_count>10  & ISO2=="NG" & (inlist(city,"ABIA","ADAMAWA","ANAMBRA","BAUCHI","BENUE","BORNO","ENUGU","GOMBE") | inlist(city,"IMO","JIGAWA","KADUNA","KANO","KATSINA","KEBBI","KOGI","KWARA") | inlist(city,"LAGOS","NASARAWA","OGUN","ONDO","OSUN","OYO","SOKOTO"))

cap erase replication/Akamai.rtf
forvalues s=1/5 {
  local specname: word `s' of Orig Revised NewNet NewNetWide Node
  local depvar  : word `s' of asinhinternet loginternet loginternet loginternet loginternet
  local time    : word `s' of year time time time time
  local suffix  : word `s' of NetHP NetHPNew NetHPNew NetHPNew Node
  local Tlim    : word `s' of .005 .005 .05 .1 .1
  local Clim    : word `s' of .1   .1   .1  .2 .2
  local big     : word `s' of bigHP big big big big   // was all "bigHP" in Roodman (2024); fixed now
  local keepsing: word `s' of keepsingletons

  cap drop connected
  gen byte connected = distance`suffix' < `Tlim'

  eststo Akamai`specname'1: reghdfe `depvar' 1.submarines#1.connected if distance`suffix'<`Clim' & ip_count>10         , cluster(city) `keepsing' a(city countryid#`time')
  eststo Akamai`specname'2: reghdfe `depvar' 1.submarines#1.connected if distance`suffix'<`Clim' & ip_count>10 & !`big', cluster(city) `keepsing' a(city countryid#`time')
  eststo Akamai`specname'3: reghdfe `depvar' 1.submarines#1.connected if distance`suffix'<`Clim' & ip_count>10 & !`big', cluster(city) `keepsing' a(city countryid#`time' connected#`time')

  esttab Akamai`specname'1 Akamai`specname'2 Akamai`specname'3 using replication/Akamai.rtf, append ///
    b(3) se(3) scalars(mean) nolines nonotes nogap nonum nostar `=cond(`s'>1,"nomtitles","")' ///
    keep(1.submarines#1.connected) ///
    stat(N, label("Observations") fmt(%7.0fc)) fonttbl(\f0\fnil LM Roman 9;) msign("–")
}

* sensitivity to coding of 3 South African cities, all of which HP classify as largest four even though only Durban is
* look like large cities with bandwidth trends below national average--connected in my geocoding and not in HP's
cap drop connected
gen byte connected = cond(ISO2=="ZA" & inlist(city,"DURBAN", "LONDON", "MIDRAND"), distanceNetHP, distanceNetHPNew) < .005
reghdfe loginternet 1.submarines#1.connected if distanceNetHPNew<.1 & ip_count>10 & ISO2=="ZA", cluster(city) a(city countryid#time)
replace connected = distanceNetHPNew < .005
reghdfe loginternet 1.submarines#1.connected if distanceNetHPNew<.1 & ip_count>10 & ISO2=="ZA", cluster(city) a(city countryid#time)
line asinhinternet time if city=="DURBAN", sort(time) || line asinhinternet time if city=="LONDON", sort(time) || line asinhinternet time if city=="MIDRAND", sort(time) tline(2009q3) || if ip_count>10
gen _city = cond(inlist(city,"LONDON","MIDRAND","DURBAN"), city, "Rest of ZA")
// table submarines _city if ISO2=="ZA" & ip_count>10, stat(mean asinhinternet)  // screws up t-stat table

MapCoordinates Akamai

MapConnectedness Akamai

Fig4 if ip_count>10, timevar(year) revisedtimevar(time) cluster(city) revisedcluster(city) a(city) reviseda(city) dataset(Akamai) depvar(asinhinternet) reviseddepvar(loginternet) title(Akamai: Log Internet speed) xscale(off fill)
}


* Get 3G signal strength circa 2014 for Afrobarometer and DHS locations

// python
// import numpy as np, pandas as pd, pyodbc, rasterio.transform
// from pyproj import Transformer
//
// src = rasterio.open('D:/OneDrive - Open Philanthropy Project/Connectivity/Hjort & Poulsen/Collins Bartholomew/Mobile Coverage Explorer v2015 - GeoTIFF/Data_MCE/Global/MCE_Global3G_2015.tif')
// band=src.read(1)
//
// TRAN_4326_TO_3857 = Transformer.from_crs("EPSG:4326", "EPSG:3857")
//
// conn = pyodbc.connect(r'DSN=Hjort & Poulsen 2019;Trusted_Connection=yes;')
// with conn:
//   crsr = conn.cursor()
//
//   crsr.execute("SELECT distinct GE.DHSCC, GE.DHSYEAR, DHSCLUST, LATNUM, LONGNUM FROM DHS.Rounds join DHS.GE ON Rounds.DHSCC=GE.DHSCC AND Rounds.DHSYEAR=GE.DHSYEAR where latnum<>0 or longnum<>0")
//   r = [(loc.DHSCC, loc.DHSYEAR, loc.DHSCLUST, band[src.index(*TRAN_4326_TO_3857.transform(loc.LATNUM, loc.LONGNUM))]) for loc in crsr.fetchall()]
//   pd.DataFrame(r, columns=['DHSCC', 'DHSYEAR', 'DHSCLUST', '3G 2015']).to_csv('DHS coverage.csv', index=False, mode='w')
// 
//   crsr.execute("SELECT geoname_id, min(latitude) as lat, min(longitude) as long from Afrobarometer.[Survey data] group by geoname_id")
//   r = [(loc.geoname_id, band[src.index(*TRAN_4326_TO_3857.transform(loc.lat, loc.long))]) for loc in crsr.fetchall()]
//   pd.DataFrame(r, columns=['geoname_id', '3G 2015']).to_csv('Afrobarometer coverage.csv', index=False, mode='w')
//
//   crsr.execute("SELECT ea_code, Centroid.STY as lat, centroid.STX as long FROM QLFS.ea_rsa_2001")
//   r = [(loc.ea_code, band[src.index(*TRAN_4326_TO_3857.transform(loc.lat, loc.long))]) for loc in crsr.fetchall()]
//   pd.DataFrame(r, columns=['ea_code', '3G 2015']).to_csv('QLFS coverage.csv', index=False, mode='w')
//
//   crsr.execute("SELECT Worcode, lat, long FROM LMMIS.Geocodings")
//   r = [(loc.Worcode, band[src.index(*TRAN_4326_TO_3857.transform(loc.lat, loc.long))]) for loc in crsr.fetchall()]
//   pd.DataFrame(r, columns=['Worcode', '3G 2015']).to_csv('LMMIS coverage.csv', index=False, mode='w')
// end


***
*** DHS
***

{
cap noi odbc load, dsn("Hjort & Poulsen 2019") exec("select * from DHS.[analysis data] where latitude is not null and longitude is not null") clear
if !_rc saveold "replication/DHS", replace

use "replication/DHS" if inlist(ISO2,"BJ","CD","GH","KE","NA","NG","TZ","TG"), clear
cap label drop countryid
egen countryid = group(ISO2), label
gen interviewdate = ym(interviewyear, interviewmonth)
format %tm interviewdate
drop if employed==9

replace edcat = . if edcat > 3
label define edcats 0 "None" 1 "Primary" 2 "Secondary" 3 "Higher", replace
label values edcat edcats

label define strengths 1 "Strong" 2 "Variable" 3 "No signal/missing", replace
labe values cov3G strengths
gen byte strong3G = 1.cov3G

gen byte strongOokla = Fixed_speed_2019Q1>=256 & Fixed_speed_2019Q1<. | Mobile_speed_2019Q1>=256 & Mobile_speed_2019Q1<.

// import distance variable in email from Hjort Poulsen, 6/17/22
ren (DHSCLUST ISO2) (v001 country)
merge m:m country v001 year using "2022-06-17 data/data/dhs/dhs_distances", nogen keepusing(distance) keep(master match)
ren (v001 country distance) (DHSCLUST ISO2 distanceNetHP)

// incorporate file emailed by Hjort Poulsen, 6/17/22, defining retained sample; evidently drops obs that don't help identification
gen grid = string(10*round(latitude/.1)) + "_" + string(10*round(longitude/.1))
ren (year interviewyear) (_year year)  // merges on *interview* year, which is not always same as nominal survey year. Losing observations
merge m:1 grid year using "$home/2022-06-17 data/data/dhs/dhspanel10", gen(HPsample) keep(master match)
ren (_year year) (year interviewyear)
replace HPsample = HPsample==3

gen byte   skilled   = inlist(occ,1,2,3,5,7,8) & employed==1 if employed<.  // https://dhsprogram.com/data/Guide-to-DHS-Statistics/Employment_and_Occupation.htm
gen byte unskilled   = inlist(occ,4,6,9      ) & employed==1 if employed<.
gen byte   skilledHP = inlist(occ,1,2,3,5,7,8) if employed<. | inlist(occ,1,2,3,5,7,8)
gen byte unskilledHP = inlist(occ,4,6,9      ) if employed<. | inlist(occ,4,6,9      )

replace distanceNetHPNew = min(distanceNetHPNew, distanceNetHPNewNbr)
replace distanceNode = min(distanceNode, distanceNodeNbr)
gen long gridid1 = round(latitude, .1) * 10 * 1e3 + round(longitude, .1) * 10 + 1e6
gen long gridid2 = round(latitude, .2) * 10 * 1e3 + round(longitude, .2) * 10 + 1e6  // https://www0.gsb.columbia.edu/mygsb/faculty/research/pubfiles/19262/HjortPoulsen-AfricaInternet-091016.pdf#page=12

gen byte agree = (distanceNetHP < .005) == (distanceNetHPNew < .005) & (distanceNetHP < .1) == (distanceNetHPNew < .1)
sum agree  // fraction agreeing; cited in text

cap erase replication/DHS.rtf
forvalues s=1/5 {
  local specname: word `s' of Orig Revised NewNet NewNetWide Node
  local suffix  : word `s' of NetHP NetHPNew NetHPNew NetHPNew Node  // "HP" means HP fiber network definition; "New" means distances from that network recalculated here
  local Tlim    : word `s' of .005 .005 .05 .1 .1
  local Clim    : word `s' of .1   .1   .1  .2 .2
  local if      : word `s' of HPsample 1 1 1 1
  local clust   : word `s' of gridid1 gridid1 gridid1 gridid2 gridid2 // DHSCLUST DHSCLUST DHSCLUST DHSCLUST
  local gridid  : word `s' of gridid1 gridid1 gridid1 gridid2 gridid2
  local keepsing: word `s' of keepsingletons
  local year    : word `s' of year interviewyear interviewyear interviewyear interviewyear
  local skilled : word `s' of skilledHP skilled skilled skilled skilled

  cap drop connected
  gen byte connected = distance`suffix' < `Tlim'

  foreach basis in 3G Ookla {
    collect, name(tstats) tags(source[DHS] specname[`specname'] basis[`basis']                   ): reg strong`basis' connected if distance`suffix'<`Clim' & `if' & submarines & ("`basis'"=="Ookla" | inlist(ISO2,"GH","KE","MG","NG","TZ","ZA")), cluster(DHSCLUST)
    collect, name(tstats) tags(source[DHS] specname[`specname'] basis[`basis'] colname[connected]): ttest strong`basis' if e(sample), by(connected)
  }

  foreach depvar in employed `skilled' un`skilled' {
    eststo DHS`specname'`depvar': reghdfe `depvar' 1.submarines#1.connected if `if' & distance`suffix'<`Clim', `keepsing' cluster(`clust') a(`gridid'#connected countryid#`year')
 }

  estrot DHS`specname': reghdfe employed 1.submarines#1.connected#ibn.edcat i.edcat countryid#`year' if `if' & distance`suffix'<`Clim', `keepsing' cluster(`clust') a(`gridid'#connected)

  esttab DHS`specname'employed DHS`specname'`skilled' DHS`specname'un`skilled' DHS`specname'None DHS`specname'Primary DHS`specname'Secondary DHS`specname'Higher using replication/DHS.rtf, append ///
    b(3) se(3) scalars(mean) nolines nonotes nogap nonum nostar `=cond(`s'>1,"nomtitles","")' ///
    rename(1.submarines#1.connected#0.edcat `specname' 1.submarines#1.connected `specname') keep(`specname') ///
    stat(N, label("Observations") fmt(%7.0fc)) fonttbl(\f0\fnil LM Roman 9;) msign("–")
}

MapConnectedness DHS

Fig4 if HPsample, dataset(DHS) timevar(year) revisedtimevar(interviewyear) cluster(gridid1) revisedcluster(gridid1 /*DHSCLUST*/) a(gridid1#connected) reviseda(gridid1#connected) depvar(employed) reviseddepvar(employed) title(DHS: Employment) xscale(off fill)
}


* demonstration that Table 4, col. 2 isn't really identified
cap drop connected
gen byte connected = distanceNetHP < .005
reghdfe employed 1.submarines#1.connected connected#year if HPsample & distanceNetHP<.1, cluster(gridid1) a(gridid1#connected countryid#year               )
reghdfe employed 1.submarines#1.connected                if HPsample & distanceNetHP<.1, cluster(gridid1) a(gridid1#connected countryid#year connected#year)


***
*** Afrobarometer
***
{
// cap noi odbc load, dsn("Hjort & Poulsen 2019") exec("select * from Afrobarometer.[Analysis data]") clear
// if !_rc saveold "replication/Afrobarometer", replace

use "replication/Afrobarometer", clear

recode education educationHP   (0 1 2 = 0) (3 4 = 1) (5 = 2) (6 7 8 9 = 3) (* = .), gen(edcat edcatHP)
label define edcats 0 "None" 1 "Primary" 2 "Secondary" 3 "Higher", replace
label values edcat* edcats

label define strengths 1 "Strong" 2 "Variable" 3 "No signal/missing", replace
label values cov3G strengths
gen byte strong3G = 1.cov3G

gen byte strongOokla = Fixed_speed_2019Q1>=256 & Fixed_speed_2019Q1<. | Mobile_speed_2019Q1>=256 & Mobile_speed_2019Q1<.

gen byte employed = inlist(employment,2,3) if inlist(employment,1,2,3) // previously included those not looking; "if employment < 9"

gen long gridid1   = round(latitude  , .1) * 10 * 1e3 + round(longitude  , .1) * 10 + 1e6
gen long gridid1HP = round(latitudeHP, .1) * 10 * 1e3 + round(longitudeHP, .1) * 10 + 1e6
gen long gridid2   = round(latitude  , .2) * 10 * 1e3 + round(longitude  , .2) * 10 + 1e6

replace distanceNetHPNew = min(distanceNetHPNew, distanceNetHPNewNbr)
recode distanceNetHP (-1 = .)

format %td date
gen int year = year(date) - (ISO2=="NG" & year(date)>2012)  // recode a few Nigeria obs in January to previous year ***TODO: confirm/copy HP in assigning year=2008 to some KE obs with missing date*** 
replace yearHP = cond(date==., yearHP, year(date))

gen byte sampleNetHP = distanceNetHP<.1
gen byte sampleNetHPNew = distanceNetHPNew<.1
egen countryid = group(ISO2)
egen geogid = group(ISO2 region district townvill)
gen dailyinternet = 4.useinternet
gen weeklyinternet = 3.useinternet | dailyinternet  // 2nd term new in Aug 2025

* mark HP19 sample
gen grid = string(10*round(latitudeHP/.1)) + "_" + string(10*round(longitudeHP/.1))
ren (year yearHP) (_year year)
merge m:1 grid year using "$home/2022-06-17 data/data/afro/afropanel10", gen(HPsample)
ren (_year year) (year yearHP)
drop grid
replace HPsample = HPsample==3

gen byte agree = (distanceNetHP < .005)==(distanceNetHPNew < .005) & (distanceNetHP < .1)==(distanceNetHPNew < .1)
sum agree  // fraction agreeing; cited in text

cap erase replication/Afro.rtf
forvalues s=1/5 {
  local specname: word `s' of Orig Revised NewNet NewNetWide Node
  local suffix  : word `s' of NetHP NetHPNew NetHPNew NetHPNew Node
  local HP      : word `s' of HP
  local if      : word `s' of "HPsample & ageHP<65" "age<65 & 1.precision_code" "age<65 & 1.precision_code" "age<65 & 1.precision_code" "age<65 & 1.precision_code"
  local clust   : word `s' of gridid1HP gridid1 gridid1 gridid2 gridid2 // geogid geogid geogid geogid
  local gridid  : word `s' of gridid1HP gridid1 gridid1 gridid2 gridid2
  local Tlim    : word `s' of .005 .005 .05 .1 .1
  local Clim    : word `s' of .1   .1   .1  .2 .2
  local keepsing: word `s' of keepsingletons

  cap drop connected
  gen byte connected = distance`suffix' < `Tlim'

  foreach basis in 3G Ookla {  // for t stat table
    collect, name(tstats) tags(source[Afrobarometer] specname[`specname'] basis[`basis']                   ): reg strong`basis' connected if distance`suffix'<`Clim' & `if' & submarines & ("`basis'"=="Ookla" | inlist(ISO2,"GH","KE","MG","NG","TZ","ZA")), cluster(geogid)
    collect, name(tstats) tags(source[Afrobarometer] specname[`specname'] basis[`basis'] colname[connected]): ttest strong`basis' if e(sample), by(connected)
  }

  eststo Afro`specname'employed: reghdfe employed`HP' 1.submarines#1.connected               if `if' & distance`suffix'<`Clim', `keepsing' a(`gridid'#connected countryid#year`HP'      ) cluster(`clust')
  estrot Afro`specname'        : reghdfe employed`HP' 1.submarines#1.connected#ibn.edcat`HP' if `if' & distance`suffix'<`Clim', `keepsing' a(`gridid'#connected countryid#year`HP' edcat`HP') cluster(`clust')

  eststo Afro`specname'dayinternet : reghdfe dailyinternet  1.submarines#1.connected if `if' & distance`suffix'<`Clim', `keepsing' a(`gridid'#connected countryid#year`HP'      ) cluster(`clust')
  eststo Afro`specname'weekinternet: reghdfe weeklyinternet 1.submarines#1.connected if `if' & distance`suffix'<`Clim', `keepsing' a(`gridid'#connected countryid#year`HP'      ) cluster(`clust')

  esttab Afro`specname'employed Afro`specname'None Afro`specname'Primary Afro`specname'Secondary Afro`specname'Higher Afro`specname'dayinternet Afro`specname'weekinternet using replication/Afro.rtf, append ///
    b(3) se(3) scalars(mean) nolines nonotes nogap nonum nostar `=cond(`s'>1,"nomtitles","")' ///
    rename(1.submarines#1.connected#0.edcat`HP' `specname' 1.submarines#1.connected `specname') keep(`specname') ///
    stat(N, label("Observations") fmt(%7.0fc)) fonttbl(\f0\fnil LM Roman 9;) msign("–")
}

* cited in text: Revised specification 0.005/0.1 specification but excluding those not looking for work
replace connected = distanceNetHPNew < .005
reghdfe employedHP 1.submarines#1.connected if age<65 & 1.precision_code & distanceNetHPNew<.1, a(gridid1#connected countryid#year) cluster(gridid1)

* compare geocodings
MapCoordinates Afro

* maps comparing connectedness status
MapConnectedness Afro

Fig4 if HPsample & ageHP<65, dataset(Afro) timevar(yearHP) revisedif(& age<65 & 1.precision_code) revisedtimevar(year) cluster(gridid1HP) revisedcluster(gridid1 /*geogid*/) a(gridid1HP#connected) reviseda(gridid1#connected) depvar(employedHP) reviseddepvar(employed) title(Afrobarometer: Employment)
}

* demonstration that Table 4, col. 2 isn't really identified
cap drop connected
gen byte connected = distanceNetHP < .005
reghdfe employed 1.submarines#1.connected connected#year if HPsample & distanceNetHP<.1, cluster(gridid1) a(gridid1#connected countryid#year               )
reghdfe employed 1.submarines#1.connected                if HPsample & distanceNetHP<.1, cluster(gridid1) a(gridid1#connected countryid#year connected#year)


***
*** South Africa QLFS
***

* import public QLFS files--missing enumeration area identifiers, but those are present in HP19 copy
// foreach file in `:dir QLFS files "*.dta"' {
//   use "QLFS/`file'", clear
//   compress
//   gen year = `=substr("`file'", 6, 4)'
//   gen byte quarter = `=substr("`file'", 11, 2)'
//   odbc insert, dsn("Hjort & Poulsen 2019") table(QLFS.QLFS) insert
// }

{
* dimension of smallest enumeration area if it's square with same area = .034 km
cap odbc load, clear dsn("Hjort & Poulsen 2019") exec("select top 1 sqrt(ogr_geometry.STArea()*1.22364E+4) as r from QLFS.ea_rsa_2001 order by ogr_geometry.STArea()")
cap di r

cap odbc load, clear dsn("Hjort & Poulsen 2019") exec("select * from QLFS.[Analysis data] where time<20103")
if !_rc saveold "replication/QLFS", replace

use "replication/QLFS", clear
replace distanceNetHPNew = min(distanceNetHPNew, distanceNetHPNewNbr)
replace distanceNode = min(distanceNode, distanceNodeNbr)
gen byte connectedNetHP = distanceNetHP < .005
gen byte connectedNet  = distanceNet < .005
gen byte connectedNode = distanceNode < .1
gen byte submarines = time>=20093

gen byte   skilled = inlist(occup, 1, 2, 3, 4, 5, 6, 7, 8) & employed==1 if employed<.
gen byte unskilled = inlist(occup, 9, 10                 ) & employed==1 if employed<.

recode educatio (1 2 = 0) (3 4 = 1) (5 = 2) (6 = 3) (* = .), gen(edcat)
label define edcats 0 "None" 1 "Primary" 2 "Secondary" 3 "Higher", replace
label values edcat edcats

label define strengths 1 "Strong" 2 "Variable" 3 "No signal/missing", replace
labe values cov3G strengths
gen byte strong3G = 1.cov3G

gen byte strongOokla = Fixed_speed_2019Q1>=256 & Fixed_speed_2019Q1<. | Mobile_speed_2019Q1>=256 & Mobile_speed_2019Q1<.

replace time = yq(floor(time/10), mod(time,10))  // put quarterly time in Stata format
format %tq time

gen byte agree = (distanceNetHP < .005) == (distanceNetHPNew < .005) & (distanceNetHP < .1) == (distanceNetHPNew < .1)
sum agree  // fraction agreeing; cited in text

cap erase replication/QLFS.rtf
forvalues s=1/5 {
  local specname: word `s' of Orig Revised NewNet NewNetWide Node
  local suffix  : word `s' of NetHP NetHPNew NetHPNew NetHPNew Node
  local Tlim    : word `s' of .005 .005 .05 .1 .1
  local Clim    : word `s' of .1   .1   .1  .2 .2
  local keepsing: word `s' of keepsingletons

  cap drop connected
  gen byte connected = distance`suffix' < `Tlim'

  foreach basis in 3G Ookla {
    collect, name(tstats) tags(source[QLFS] specname[`specname'] basis[`basis']                   ): reg strong`basis' connected if distance`suffix'<`Clim' & submarines, cluster(eacode)
    collect, name(tstats) tags(source[QLFS] specname[`specname'] basis[`basis'] colname[connected]): ttest strong`basis' if e(sample), by(connected)
  }

  foreach depvar in employed skilled unskilled {
    eststo QLFS`specname'`depvar': reghdfe `depvar' 1.submarines#1.connected                   if distance`suffix'<`Clim', `keepsing' a(time eacode) cluster(eacode)
 }
  estrot QLFS`specname'          : reghdfe employed 1.submarines#1.connected#ibn.edcat i.edcat if distance`suffix'<`Clim', `keepsing' a(time eacode) cluster(eacode)

  esttab QLFS`specname'employed QLFS`specname'skilled QLFS`specname'unskilled QLFS`specname'None QLFS`specname'Primary QLFS`specname'Secondary QLFS`specname'Higher using replication/QLFS.rtf, append ///
    b(3) se(3) scalars(mean) nolines nonotes nogap nonum nostar `=cond(`s'>1,"nomtitles","")' ///
    rename(1.submarines#1.connected#0.edcat `specname' 1.submarines#1.connected `specname') keep(`specname') ///
    stat(N, label("Observations") fmt(%7.0fc)) fonttbl(\f0\fnil LM Roman 9;) msign("–")
}

* year-specific coefficients
cap drop connected
gen byte connected = distanceNode<.1
reghdfe employed time#1.connected if distanceNode<.2, a(time eacode) cluster(eacode) nocons
mata st_matrix("e(at)", `=tq(2008q1)'..`=tq(2010q2)')
coefplot, omitted base at xlab(,format(%tq)) xline(`=tq(2009q2)+.5', lcolor(gs8) lpat(solid)) tlab(2008q1(1)2010q2, nogrid) ///
  text(-.049 `=tq(2009q2)+.5' "Undersea cable connection", place(ne) size(medsmall)) name(QLFSbyYear, replace)
graph export Replication/QLFSbyYear.png, replace width(2000)

gen ISO2 = "ZA"
gen country_name = ""
gen countryid = 1
MapConnectedness QLFS

Fig4, dataset(QLFS) timevar(time) revisedtimevar(time) cluster(eacode) revisedcluster(eacode) a(eacode) reviseda(eacode) depvar(employed) reviseddepvar(employed) title(South Africa QLFS: Employment)
}


***
*** Ethiopia
***
{
cap odbc load, dsn("Hjort & Poulsen 2019") exec("select * from LMMIS.[Analysis data]") clear
if !_rc saveold "replication/LMMIS", replace

use "replication/LMMIS", clear
gen long gridid1 = round(latitude, .1) * 10 * 1e3 + round(longitude, .1) * 10 + 1e6
gen long gridid2 = round(latitude, .2) * 10 * 1e3 + round(longitude, .2) * 10 + 1e6
gen byte submarines = year>=2009

gen byte strongOokla = Fixed_speed_2019Q1>=256 & Fixed_speed_2019Q1<. | Mobile_speed_2019Q1>=256 & Mobile_speed_2019Q1<.

gen byte agree = (distanceNetHP < .005) == (distanceNetHPNew < .005) & (distanceNetHP < .1) == (distanceNetHPNew < .1)
sum agree  // fraction agreeing; cited in text

cap erase replication/LMMIS.rtf
forvalues s=1/5 {
  local specname: word `s' of Orig Revised NewNet NewNetWide Node
  local suffix  : word `s' of NetHP NetHPNew NetHPNew NetHPNew Node
  local clust   : word `s' of gridid1HP gridid1 gridid1 gridid2 gridid2 // Worcode Worcode Worcode Worcode
  local gridid  : word `s' of gridid1HP gridid1 gridid1 gridid2 gridid2
  local Tlim    : word `s' of .005 .005 .05 .1 .1
  local Clim    : word `s' of .1   .1   .1  .2 .2
  local keepsing: word `s' of keepsingletons

  cap drop connected
  gen byte connected = distance`suffix' < `Tlim'

  foreach basis in Ookla {
    collect, name(tstats) tags(source[LMMIS] specname[`specname'] basis[`basis']): reg strong`basis' connected if distance`suffix'<`Clim' & submarines, cluster(Worcode)
    collect, name(tstats) tags(source[LMMIS] specname[`specname'] basis[`basis'] colname[connected]): ttest strong`basis' if e(sample), by(connected)
  }

  forvalues f=1/2 {
    local FEs: word `f' of "year Id" "`gridid'#connected indcode#year"
    foreach depvar in l s u {
      eststo LMMIS`f'`specname'`depvar': reghdfe `depvar' 1.submarines#1.connected if distance`suffix'<`Clim', a(`FEs') cluster(`clust') `keepsing'
    }
  }
//   reghdfe y k 1.submarines#1.connected##c.(u s) if distance`suffix'<`Clim', a(`FEs') cluster(`clust') `keepsing'  // HP19 Table 8, col. 7

  esttab LMMIS1`specname'l LMMIS2`specname'l LMMIS1`specname's LMMIS2`specname's LMMIS1`specname'u LMMIS2`specname'u using replication/LMMIS.rtf, append ///
    b(3) se(3) scalars(mean) nolines nonotes nogap nonum nostar `=cond(`s'>1,"nomtitles","")' ///
    rename(1.submarines#1.connected#0.edcat `specname' 1.submarines#1.connected `specname') keep(`specname' /*1.submarines#1.connected#c.u 1.submarines#1.connected#c.s*/) ///
    stat(N, label("Observations") fmt(%7.0fc)) fonttbl(\f0\fnil LM Roman 9;) msign("–")
}

* year-specific coefficients
cap drop connected
gen byte connected = distanceNetHP<.005
reghdfe l year#1.connected if distanceNetHP<.1, a(year Id) cluster(Worcode) nocons
mata st_matrix("e(at)", 2006..2013)
coefplot, omitted base at xline(2009.5, lcolor(gs8) lpat(solid)) xlab(2006/2013, nogrid) ///
  text(0 2009.5 "Undersea cable connection", place(ne) size(medsmall)) name(LMMISbyYear, replace)
graph export replication/LMMISbyYear.png, replace width(2000)

gen ISO2 = "ET"
gen country_name = ""
gen countryid = 1
MapConnectedness LMMIS

Fig4, dataset(LMMIS) timevar(year) revisedtimevar(year) cluster(gridid1HP) revisedcluster(gridid1 /*Worcode*/) a(Id) reviseda(Id) depvar(l) reviseddepvar(l) title(Ethiopia LMMIS: Log firm employment)
}


***
*** table of treatment-control t stats in strong 3G coverage/Ookla speed
***
{
collect label levels result mu_1 "Controls" mu_2 "Treated" N_1 "N" N_2 "N", modify
collect style cell result[mu_1 mu_2 _r_z], nformat(%3.2f) name(tstats)
collect style cell result[N_1 N_2], nformat(%8.0gc) name(tstats)
collect style cell, font("LM Roman 9", size(8)) border(right, pattern(nil))
collect style cell cell_type[column-header corner], border(top, pattern(nil)) name(tstats)
collect style header colname, level(hide)
collect style row split, dups(center)
collect style column, dups(center)
collect layout (colname[connected]#specname#basis) (source#result[mu_2 N_2 mu_1 N_1 _r_z]), name(tstats)
collect export replication/tstats.docx, replace
}


***
*** Meta-analysis
***

forvalues s=1/5 {
  local suffix: word `s' of Orig Revised NewNet NewNetWide Node
  local title: word `s' of "Original, 0.005°/0.1° from lines" "Revised, 0.005°/0.1° from lines" "Revised, 0.05°/0.1° from lines" "Revised, 0.1°/0.2° from lines" "Revised, 0.1°/0.2° from nodes"
  cwf default
  cap frame drop meta
  frame create meta str10(DataSource) long(N) double(es se)
  foreach dataset in DHS Afro QLFS {
    est restore `dataset'`suffix'employed
    frame post meta ("`dataset'") (e(N)) (_b[1.submarines#1.connected]) (_se[1.submarines#1.connected])
  }
  cwf meta
  meta set es se, studylabel(DataSource)
  meta summarize
  di "Standard error = " r(se)
  meta forestplot, columnopts(_esci, format(%4.3f)) markeropts(msize(small)) ciopts(lcolor(red)) noohetstats nonotes noohomtest noosigtest ///
    xscale(off) graphregion(margin(0 0 0 2)) name(Meta`suffix', replace) nullrefline scheme(lean1) xlab(none) xscale(range(-.08 .22)) title("`title'", pos(11))
  graph export Replication/Meta`suffix'.png, replace width(2000)
}
cwf default


***
*** Figure 4 replications
***

grc1leg2 Fig4Akamai Fig4DHS Fig4Afro Fig4QLFS Fig4LMMIS, lrows(1) graphregion(margin(zero)) legscale(*1) name(Fig4, replace) imargin(3 3 0 0) ring(0) pos(5) lxoffset(-10) lyoffset(20)
graph export Replication/Fig4.png, width(2000) replace


***
*** WBES
***
{
use WBES/New_Comprehensive_March_31_2022 if region==1, clear
rename a3ax city

gen year = real(substr(country, strlen(country)-3, .))
replace country = substr(country, 1, strlen(country)-4)

drop if year>2016 | inlist(country, "Namibia", "DRC", "Cameroon", "Madagascar", "Mauritius", "Malawi", "Southsudan", "Sudan", "Uganda")  // 2016 is a guess; adding 2018-19 adds power

merge 1:1 idstd year using "WBES/Firm Level TFP Estimates and Factor Ratios_March_31_2022", keep(match) nogen

replace city = "Dar es Salaam" if city == "Dar Es Salaam"
replace city = "Thiès" if regexm(city, "Th.*s")
replace city = "Addis Ababa" if city=="Addisababa"
replace city = "Cross River" if city=="Cross river"
replace city = "Oromia" if city=="Oromya"
replace city = "Snnpr" if city=="Snnp"
replace city = "Enugu" if city=="Eungu"

gen int time = yq(year, floor((a14m-1)/3) + 1)  
gen byte submarines = time >= tq(2009q3) & inlist(country, "Kenya", "Mozambique", "Tanzania", "SouthAfrica") | ///
                      time >= tq(2010q3) & inlist(country, "BurkinaFaso", "Benin", "Côte d'Ivoire", "Ghana", "Nigeria", "Senegal", "Togo") | ///
                      time >= tq(2011q2) & inlist(country, "Mauritania") | ///
                      time >= tq(2012q2) & inlist(country, "Rwanda", "Zimbabwe", "Angola") | ///
                      time >= tq(2012q4) & inlist(country, "Gambia", "Guinea", "Liberia", "SierraLeone")

egen countryid = group(country)
gen logemployees = asinh(l1) if l1>=0
gen logexports   = asinh(d3c / 100 * d2_gdp09) if d3c >= 0
gen logsales     = asinh(d2_n2a) // the other ones have too many missing values
gen byte training = l10==1
gen byte email   = c22a == 1 if inlist(c22a, 1, 2)
gen byte website = c22b == 1 if inlist(c22b, 1, 2)

gen byte connectedHP = inlist(city, "Anambra", "Niger", "Dar es Salaam", "Nouadhibou", "Nouakchott", "Saint-Louis", "Thiès", "Dakar") | ///
                       inlist(city, "Takoradi", "Accra", "Tema", "North", "Lagos", "Oyo", "Sokoto", "Kano", "Kaduna") | ///
                       inlist(city, "Abuja", "Eungu", "Nairobi", "Central", "Nakuru", "Nyanza", "Mombasa", "Arusha", "Mwanza") | ///
                       inlist(city, "Mbeya", "Kaolack")

* Tamale, Kumasi, Katsina, Pemba, Kisumu (and Abia??) look connected on the map while Nyanza doesn't

gen byte connected = inlist(city, "Tamale", "Kumasi", "Katsina", "Pemba", "Kisumu") | ///
                     inlist(city, "Anambra","Dar es Salaam", "Nouadhibou", "Nouakchott", "Saint-Louis", "Thiès", "Dakar") | ///
                     inlist(city, "Takoradi", "Accra", "Tema", "Lagos", "Oyo", "Sokoto", "Kano", "Kaduna") | ///
                     inlist(city, "Abuja", "Enugu", "Nairobi", "Nakuru", "Mombasa", "Arusha", "Mwanza") | ///
                     inlist(city, "Mbeya", "Kaolack")

eststo WBESOrig:reghdfe logemployees 1.submarines#1.connectedHP         , cluster(city) a(city countryid##year isic      )  // Table A2, col. 1, approx
                reghdfe logemployees 1.submarines#1.connectedHP         , cluster(city) a(city countryid##year isic##year)  // Table A2, col. 2, approx
eststo WBESNew: reghdfe logemployees 1.submarines#1.connected [pw=wt_rs] if country!="Nigeria", cluster(city) a(city countryid##year isic)  // no id without Nigeria
esttab WBESOrig WBESNew using replication/WBES.rtf, replace se nolines nonotes nocons nogap mtitles nonum nostar rename(1.submarines#1.connectedHP Treatment 1.submarines#1.connected Treatment) keep(Treatment) stat(N, label("Observations") fmt(%7.0fc)) msign("–")

tab city if country=="Nigeria"  // state names (Abuja ~= Federal Capital Territory)
}


***
*** light
***

* Extract satellite data to csv for import into SQL Server
{
// python
// import numpy as np
// import pandas as pd
// import os, glob, pyodbc, shapely.wkt, rasterio.mask, rasterio.transform
//
// os.chdir(r'$home/DMSP')
// try:
//   os.remove('light.csv')
// except:
//   pass
//
// conn = pyodbc.connect(r'DSN=Hjort & Poulsen 2019;Trusted_Connection=yes;')
// crsr = conn.cursor()
// crsr.execute("select iso2, ogr_geometry.STAsText() as border, minlong, minlat, maxlong, maxlat from [Africa countries] where iso2 in ('BJ','CD','NG','NA','SN','TZ','TG','GH','KE','MG','MZ','ZA', 'BF','BI','MW','ML','NE','RW','UG','ZM','ZW')")  # bounding boxes for countries of interest
//
// src = {}; srccal = {}
// for y in range(2007,2014):
//   src[y]    = rasterio.open(glob.glob('F1?%d.v4?.global.stable_lights.avg_vis.tif'          % y)[0])  # https://eogdata.mines.edu/wwwdata/dmsp/v4composites_rearrange
//   srccal[y] = rasterio.open(glob.glob('F1?%d.v4?.global.intercal.stable_lights.avg_vis.tif' % y)[0])
//
// for borderrec in crsr.fetchall():
//   corner1x, corner1y = src[2007].index(borderrec.minlong, borderrec.minlat)  # indexes in geotiff array for corners of each country's bounding box
//   corner2x, corner2y = src[2007].index(borderrec.maxlong, borderrec.maxlat)
//   rows, cols = np.meshgrid(np.arange(corner1x, corner2x, -1), np.arange(corner1y, corner2y))  # indexes of all pixels in box
//   long, lat = rasterio.transform.xy(src[2007].transform, rows, cols)  # corresponding long, lats
//   long, lat = np.concatenate(long), np.concatenate(lat)
//   df = pd.DataFrame({"iso2": np.full(lat.shape, borderrec.iso2), "longitude": long, "latitude": lat})  # make DataFrame: country code, lat, long, but no light data yet
//   wkt = shapely.wkt.loads(borderrec.border)  # get country border
//
//   for y in range(2007,2014):
//     masked = np.squeeze(rasterio.mask.mask(src[y], [wkt], nodata=128)[0])     # recode pixels inside bounding box but outside border to 128
//     df['light%u' % y] = masked[rows,cols].ravel()                             # store in DataFrame
//     masked = np.squeeze(rasterio.mask.mask(srccal[y], [wkt], nodata=128)[0])  # same for calibrated data
//     df['lightcal%u' % y] = masked[rows,cols].ravel()
// 
//   df = df.loc[df['light2007'] != 128]  # drop pixels outside borders
//   df.to_csv('light.csv', index=False, header=False, mode='a')  # NB: longitude first
// end
}


* Prep raw light data
cap noi {
  odbc load, dsn("Hjort & Poulsen 2019") exec(`"select * from DMSP.[analysis data`=cond("`data'"=="cal",", calibrated","")'] where distanceNet<.2 or distanceNode<.2"') clear
  gen byte experiment = inlist(ISO2,"BJ","CD","NG","NA","SN","TZ") | inlist(ISO2,"TG","GH","KE","MG","MZ","ZA")
  gen byte submarines = year>=2009 & inlist(ISO2,"KE","MZ","ZA","TZ") | year>=2010 & inlist(ISO2,"BJ","GH","MG","NG","SN","TG") | year>=2012 & inlist(ISO2,"CD","NA")
  gen long gridid1 = round(latitude, .1) * 10 * 1e3 + round(longitude, .1) * 10 + 1e6
  gen long gridid2 = round(latitude, .2) * 10 * 1e3 + round(longitude, .2) * 10 + 1e6
  cap label drop countryid
  egen byte countryid = group(ISO2), label
  recode lightcal (128 = .)
  gen lnlight = asinh(light)
  gen lnlightcal = asinh(lightcal)
  replace distanceNet = distanceNetNbr
  saveold "replication/DMSP", replace ver(12)
}

* Original regression--Table 9, col. 1
{
use "$HPdata/data/light" if distance<.1 & (inlist(country,"Benin","Democratic Republic of Congo","Nigeria","Namibia","Senegal") | inlist(country,"Tanzania","Togo","Ghana","Kenya","Madagascar","Mozambique","South Africa")), clear

gen byte submarinesHP = ///  // bug in original: yields values >1
  (year>=2009 & inlist(country,"Kenya","Mozambique","Tanzania","South Africa")) + ///
  (year>=2009 & inlist(country,"Kenya")) + ///
  (year>=2010 & inlist(country,"Kenya","Madagascar","Mozambique","Tanzania","Benin","Ghana","Nigeria","Senegal","Togo")) + ///
  (year>=2011 & inlist(country,"Ghana","Nigeria","Senegal")) + ///
  (year>=2012 & inlist(country,"Kenya")) + ///
  (year>=2012 & inlist(country,"Kenya","South Africa","Democratic Republic of Congo","Ghana","Namibia","Nigeria","Togo")) + ///
  (year>=2012 & inlist(country,"Benin","Democratic Republic of Congo","Ghana","Namibia","Nigeria","Senegal"))
gen byte submarines = year>=2009 & inlist(country,"Kenya","Mozambique","South Africa","Tanzania") | ///
                      year>=2010 & inlist(country,"Benin","Ghana","Madagascar","Nigeria","Senegal","Togo") | ///
                      year>=2012 & inlist(country,"Democratic Republic of Congo","Namibia")

gen byte connected = distance<.005
gen lnlight = asinh(light)
gen long gridid1 = round(latitude, .1) * 10 * 1e3 + round(longitude, .1) * 10 + 1e6
egen countryid = group(country)

eststo clear
forvalues s=1/2 {
  local FE: word `s' of "" connected#year
  eststo Light`s'Origreghdfe: reghdfe lnlight c.submarinesHP#1.connected, a(gridid1#connected countryid#year `FE') cluster(gridid1) keepsingletons
//   eststo Light`s'Origppmlhdfe: ppmlhdfe light c.submarines#1.connected, a(`FE') cluster(gridid1)
}
esttab, replace b(3) se(3) nolines nonotes nocons nogap nomtitles nonum nostar ///
       rename(1.connected#c.submarinesHP Treatment) keep(Treatment) ///
       stat(N, label("Observations") fmt(%10.0fc)) msign("–") fonttbl(\f0\fnil LM Roman 9;)
}

* new regressions
{
use "replication/DMSP" if experiment, clear
gen byte connectedNet  = distanceNet < .1
gen byte connectedNode = distanceNode < .1
gen byte F18 = year>2009

cap erase replication/light.rtf
forvalues s=1/4 {
  local specname: word `s' of Revised NewNet NewNetWide Node
  local suffix  : word `s' of Net Net Net Node
  local gridid  : word `s' of gridid1 gridid1 gridid2 gridid2
  local Tlim    : word `s' of .005 .05 .1 .1
  local Clim    : word `s' of .1   .1  .2 .2
  cap drop connected
  gen byte connected = distance`suffix' < `Tlim'

  eststo clear
  foreach cal in "" cal {
    forvalues e=1/1 /*2*/ {
      local cmd: word `e' of reghdfejl ppmlhdfe
      local depvar: word `e' of lnlight`cal' light`cal'
      local estname: word `e' of ols pois
      forvalues f=1/3 {
        local FE: word `f' of "" connected#F18 connected#year
        eststo: `cmd' `depvar' 1.submarines#1.connected if distance`suffix'<`Clim', a(`gridid'#connected countryid#year `FE') cluster(`gridid') nosamp
      }
    }
  }
  esttab using replication/light.rtf, append ///
         b(3) se(3) nolines nonotes nocons nogap nomtitles nonum nostar ///
         rename(1.submarines#1.connected Treatment) keep(Treatment) ///
         stat(N, label("Observations") fmt(%10.0fc)) msign("–") fonttbl(\f0\fnil LM Roman 9;)
}
}

* country-specific estimates
{
foreach cal in "" cal {
  forvalues e=1/1 /*2*/ {
    local cmd: word `e' of reghdfejl ppmlhdfe
    local depvar: word `e' of lnlight`cal' light`cal'
    forvalues n=1/2 {
      local geom: word `n' of Net Node
        eststo `geom'`cal'All`cmd'   : `cmd' `depvar' 1.connected`geom'#ibn.year if distance`geom'<.2, a(gridid2#connected`geom' countryid#year) cluster(gridid2) nocons nosamp tol(1e-10)
      foreach ISO2 in BJ CD NG NA SN TZ TG GH KE MG MZ ZA {
        eststo `geom'`cal'`ISO2'`cmd': `cmd' `depvar' 1.connected`geom'#ibn.year if distance`geom'<.2 & ISO2=="`ISO2'", a(gridid2#connected`geom' countryid#year) cluster(gridid2) nocons
      }
    }
  }
}


foreach cal in "" cal {
  forvalues n=1/2 {
    local geom: word `n' of Net Node
    local ytitle = cond("`cal'"=="", `"Using distance from `:word `n' of lines nodes'"', " ")
    local title = cond(`n'==1, `"Using `=cond("`cal'"=="", "raw", "calibrated")' light values"', " ")
    coefplot (`geom'`cal'Allreghdfejl, lab(All)) (`geom'`cal'BJreghdfejl, lab(Benin)) (`geom'`cal'CDreghdfejl, lab(DRC)) (`geom'`cal'GHreghdfejl, lab(Ghana)) (`geom'`cal'KEreghdfejl, lab(Kenya)) (`geom'`cal'MGreghdfejl, lab(Madagascar)) (`geom'`cal'MZreghdfejl, lab(Mozambique)) (`geom'`cal'NAreghdfejl, lab(Namibia)) (`geom'`cal'NGreghdfejl, lab(Nigeria)) (`geom'`cal'SNreghdfejl, lab(Senegal)) (`geom'`cal'ZAreghdfejl, lab(South Africa)) (`geom'`cal'TZreghdfejl, lab(Tanzania)) (`geom'`cal'TGreghdfejl, lab(Togo)), ///
      name(annual`geom'`cal', replace) rename("1.connected`geom'#([0-9]+).year$" = \1, regex) omitted drop(_cons) vertical grid(between) msize(vsmall) ///
      `=cond(`n'==1, `"title(`title', size(medium))"', "")' ///
      `=cond("`cal'"=="", `"ytitle(`ytitle', size(medium))"', "")' ///
      xlab(, notick labsize(medsmall)) ylab(, format(%2.1f)) xline(3.5, lcolor(gs8) lpat(solid)) ///
      `=cond("`geom'`cal'"=="Net", `"text(-.3 3.5 "Satellite switch", place(ne) size(small))"', "")' ///
      `=cond(`n'==1,"xscale(off fill)","")' `=cond("`cal'"=="","","yscale(off fill)")' ///
      legend(keygap(1pt)) graphregion(margin(zero)) /*nodraw*/
  }
}
grc1leg2 annualNet annualNetcal annualNode annualNodecal, lrows(1) cols(2) graphregion(margin(zero)) legscale(*1) ycommon name(LightByCountry, replace) imargin(zero)
graph export Replication/LightByCountry.png, width(2000) replace

foreach cal in "" cal {
  forvalues n=1/2 {
    local geom: word `n' of Net Node
    local ytitle = cond("`cal'"=="", `"Using distance from `:word `n' of lines nodes'"', " ")
    local title = cond(`n'==1, `"Using `=cond("`cal'"=="", "raw", "calibrated")' light values"', " ")
    coefplot (`geom'`cal'Allreghdfejl, lab(All)), ///
      name(annual`geom'`cal', replace) rename("1.connected`geom'#([0-9]+).year$" = \1, regex) omitted drop(_cons) vertical grid(between) msize(vsmall) ///
      `=cond(`n'==1, `"title(`title', size(medium))"', "")' ///
      `=cond("`cal'"=="", `"ytitle(`ytitle', size(medium))"', "")' ///
      xlab(, notick labsize(medsmall)) ylab(, format(%3.2f)) xline(3.5, lcolor(gs8) lpat(solid)) ///
      `=cond("`geom'`cal'"=="Net", `"text(-.1 3.5 "Satellite switch", place(ne) size(small))"', "")' ///
      `=cond(`n'==1,"xscale(off fill)","")' `=cond("`cal'"=="","","yscale(off fill)")' ///
      nodraw
  }
}
graph combine annualNet annualNetcal annualNode annualNodecal, cols(2) graphregion(margin(zero)) ycommon name(Light, replace) imargin(zero)
graph export Replication/Light.png, width(2000) replace

gen byte connectedNetHP = distanceNet < .005
foreach cal in "" cal {
  reghdfejl lnlight`cal' 1.connectedNetHP#ibn.year if distanceNet<.1, a(gridid1#connectedNetHP countryid#year) cluster(gridid1) nocons nosamp tol(1e-10)
  coefplot, ///
    name(annualNetHP`cal', replace) rename("1.connectedNetHP#([0-9]+).year$" = \1, regex) omitted drop(_cons) vertical grid(between) msize(vsmall) ///
    title(Using `=cond("`cal'"=="", "raw", "calibrated")' light values, size(medium)) ///
    xlab(,notick) ylab(, format(%3.2f)) xline(3.5, lcolor(gs8) lpat(solid)) ///
    `=cond("`cal'"=="", `"text(-.08 3.5 "Satellite switch", place(ne) size(small))"', "yscale(off fill)")' nodraw
}
graph combine annualNetHP annualNetHPcal, cols(2) graphregion(margin(zero)) ycommon name(LightHP, replace) imargin(zero) ysize(2) iscale(*2)
graph export Replication/LightHP.png, width(2000) replace
}
